suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(hablar))
suppressPackageStartupMessages(library(tsibble))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(DT))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(ggbiplot))

The module2 of the survey on the perception of risk and benefit from energy technologies in India.

Following is the complete data set with 1099 responses.

moduletwo <- read_excel( "~/Desktop/thesis_surveydata/political_factors_complete.xlsx")
save(moduletwo, file = "moduletwo.rda")
module2 <- data.frame(moduletwo)
module2 %>%
  filter(!row_number() %in% c(1,2))
# loaded data set for module 2 eco -pol variables and removes the rows with question number and question text

Following graphs show the distribution of percerived risk and benefit on the likert scale

#plotting perceived risk from different technologies

module2%>%
  filter(!row_number() %in% c(1,2)) %>% 
  select(starts_with("RISKY"))%>%
  gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
  separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
  select(-Risky) %>%
  group_by(Technology, Perceived_Risk) %>%
  dplyr::summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")
df_barplot<-module2%>%
  filter(!row_number() %in% c(1,2)) %>% 
  select(starts_with("RISKY"))%>%
  gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
  separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
  select(-Risky) %>% 
  group_by(Technology, Perceived_Risk) %>%
  dplyr::summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplot %>% ggplot(aes(fill= fct_rev(factor(Perceived_Risk, levels=c("Extremely risky", "Very risky","Moderately risky","Slightly risky","Not at all risky","Rather not say/Don't know"))), y=n , x=Technology)) + 
  geom_bar(position="fill",stat="identity")+
  scale_x_discrete(limits = c("Solar", "INDSolar", "INDFirewoodetc" ,"Wind" , "Hydro", "INDWind", "INDHydro", "INDDiesel" , "INDBiogas","INDKerosene", "Oil", "Coal","Gas", "INDLPG","Nuclear"))+
  scale_fill_manual("legend", values = c("Extremely risky" = "#8C4646", "Very risky" = "#D9695F", "Moderately risky" = "#F2A679", "Slightly risky" = "#F2D091","Not at all risky" = "#5D8C7B", "Rather not say/Don't know" = "#808080"))+
  coord_flip()+
  theme_classic()

colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")

df_barplotben <- module2%>%
  filter(!row_number() %in% c(1,2)) %>% 
  select(starts_with("Ben"))%>%
  gather(Technology, Perceived_Benefit, Ben_Hydro:Ben_INDKerosene, factor_key = TRUE)%>%
  separate(Technology, c("Ben", "Technology"), extra = "merge", fill = "left")%>%
  select(-Ben) %>% 
  group_by(Technology, Perceived_Benefit) %>%
  dplyr::summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplotben %>% ggplot(aes(fill= factor(Perceived_Benefit, levels=c("Rather not say/Don't know","Not at all beneficial","Slightly beneficial","Moderately beneficial","Very beneficial", "Extremely beneficial")), y=n , x=Technology)) + 
  geom_bar(position="fill",stat="identity")+
  scale_fill_manual("legend", values = c("Rather not say/Don't know" = "grey94" ,"Not at all beneficial" = "orange","Slightly beneficial"="yellow" ,"Moderately beneficial" = "palegreen","Very beneficial"= "green3", "Extremely beneficial"= "green4"))+
  theme_classic()+
  coord_flip()

# fix Rather not say issue.

Now I am converting the likert scale for various scales used in the survey to numeric values from 1 to 5, Rather Not Say/Don’t know is printed as NA. Some variables had to be reverse coded as well.

#This chunk contains the coding schemes of various scales used in survey one: eco-political factors, kahan scale and acceptance scale
codedmodule2 <- module2 %>%
  
#remove row 1
  filter(!row_number() %in% c(1,2)) %>% 
  
# replace risky likert scale with numbers
  mutate_at(vars(starts_with("Risky")), funs(case_when(. =="Not at all risky" ~ 1, 
                                                       . =="Slightly risky" ~ 2, 
                                                       . =="Moderately risky" ~ 3, 
                                                       . =="Very risky" ~ 4, 
                                                       . =="Extremely risky" ~ 5))) %>%

# replace beneficial likert scale with numbers  
  mutate_at(vars(starts_with("Ben")), funs(case_when(. =="Not at all beneficial" ~ 1,
                                                     . =="Slightly beneficial" ~ 2,
                                                     . =="Moderately beneficial" ~ 3,
                                                     . =="Very beneficial" ~ 4,
                                                     . =="Extremely beneficial" ~ 5 ))) %>%

# replace nuclear acceptance likert scale with numbers
  mutate_at(vars(N_accept,N_reluctantlyaccept,N_reject), funs(case_when(. == "Strongly disagree" ~ 1, 
                                                                        . == "Somewhat disagree" ~ 2,
                                                                        . == "Neither agree nor disagree" ~3,
                                                                        . == "Somewhat agree" ~ 4,
                                                                        . == "Strongly agree" ~ 5))) %>%
  
# code likert scale for variables for Kahan scale into numbers
  mutate_at(vars(starts_with (c("K_I","K_H","DISPLACE", "POLLUTE", "HEALTH", "JOBS", "BEAUTY", "PRIDE", "NPRIDE", "DEV", "PROSPER", "RELY"))), funs(case_when(. == "Strongly disagree" ~ 1, 
               . == "Somewhat disagree" ~ 2,
               . == "Neither agree nor disagree" ~3,
               . == "Somewhat agree" ~ 4,
               . == "Strongly agree" ~ 5))) %>%
  
# reverse code for likert scale for variables for Kahan scale into numbers
  mutate_at(vars(starts_with (c("K_S","K_E"))), funs(case_when(. == "Strongly disagree" ~ 5, 
                                                               . == "Somewhat disagree" ~ 4,
                                                               . == "Neither agree nor disagree" ~3,
                                                               . == "Somewhat agree" ~ 2,
                                                               . == "Strongly agree" ~ 1))) %>%

# code eco-pol scale variables into numbers
  mutate_at(vars(SYSTEMDEMO,SYSTEMRELIGION,SYSTEMTECHNO,SYSTEMTOTAL,WEALTHLIM,MECHANISATION,DECISIONDECEN,INDUSTRYLARGE,ECONOMYGLOBAL,OWNERPVT,OWNERNOREG), funs(case_when(. == "Strongly disagree" ~ 1, 
                        . == "Somewhat disagree" ~ 2,
                        . == "Neither agree nor disagree" ~3,
                        . == "Somewhat agree" ~ 4,
                        . == "Strongly agree" ~ 5))) %>%

# reverse code eco-pol scale variables into numbers 
 mutate_at(vars(DECISIONCEN, INDUSTRYSMALL, ECONOMYLOCAL, ENVOVERDEV,OWNERPUB, OWNERREG), funs(case_when(. == "Strongly disagree" ~ 5, 
                                                                                                         . == "Somewhat disagree" ~ 4,
                                                                                                         . == "Neither agree nor disagree" ~3,
                                                                                                         . == "Somewhat agree" ~ 2,
                                                                                                         . == "Strongly agree" ~ 1)))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
codedmodule2

Following table shows the summary statistics of perceived risk from different energy technologies.

# summary statistics for risks from different energy sources

risksummary <- codedmodule2 %>%
  summarize_at(vars(starts_with(c("RISKY"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
  #add row index so later spreading indexed correctly
  add_rownames()%>%
  #melt to long format
  gather(technology, value, -rowname) %>%
  # separate risky from variable suffix
  separate(technology, c("Risky", "var"), extra = "merge", fill = "left") %>%
  #separate mean from variable prefix
  separate(var, c("technology", "summary")) %>%
  # spread summary values back to wide form
  spread(summary,value) %>%
  #clean up
  select(-rowname, -Risky) %>%
  arrange(mean) %>%
  setNames(paste0('perceivedrisk.', names(.))) %>%
  dplyr::rename(technology= perceivedrisk.technology)
## Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
## Please use `tibble::rownames_to_column()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
risksummary

Following table shows the summary statistics of perceived benefit from different energy technologies.

# summary statistics for benefits from different energy sources

benefitsummary <- codedmodule2 %>%
  summarize_at(vars(starts_with(c("Ben"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
  #add row index so later spreading indexed correctly
  add_rownames()%>%
  #melt to long format
  gather(technology, value, -rowname) %>%
  # separate Ben from variable suffix
  separate(technology, c("Ben", "var"), extra = "merge", fill = "left") %>%
  #separate mean from variable prefix
  separate(var, c("technology", "summary")) %>%
  # spread summary values back to wide form
  spread(summary,value) %>%
  #clean up
  select(-rowname, -Ben) %>%
  arrange(mean)%>%
  setNames(paste0('perceivedbenefit.', names(.))) %>%
  dplyr::rename(technology = perceivedbenefit.technology)
benefitsummary
#round off to two decimal places

bar plots for means of perceived risk and benefit from various energy technologies

full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
  select(technology, perceivedrisk.mean, perceivedbenefit.mean) %>%
  mutate(technology = fct_reorder(technology, perceivedrisk.mean, .desc = TRUE))%>%
   gather(key = "level", value = "mean",
       perceivedrisk.mean, perceivedbenefit.mean) %>%
  ggplot(aes(x=technology, y= mean, fill = level, ))+
  geom_bar(stat="identity", position=position_dodge())+
  ylim(0,4)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual("legend", values = c("perceivedbenefit.mean" = "palegreen3", "perceivedrisk.mean" = "firebrick2"))
## Joining, by = "technology"

bar plots for median of perceived risk and benefit from various energy technologies

full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
  select(technology, perceivedrisk.median, perceivedbenefit.median) %>%
  mutate(technology = fct_reorder(technology, perceivedrisk.median, .desc = TRUE))%>%
   gather(key = "level", value = "median",
       perceivedrisk.median, perceivedbenefit.median) %>%
  ggplot(aes(x=technology, y= median, fill = level, ))+
  geom_bar(stat="identity", position=position_dodge())+
  ylim(0,4)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual("legend", values = c("perceivedbenefit.median" = "palegreen3", "perceivedrisk.median" = "firebrick2"))
## Joining, by = "technology"

PCA on all indepdendent variables in the data set

#PCA on all variables

pcaresults <- codedmodule2 %>%
  select(!starts_with(c("Risky","BEN", "Gender","Caste","Religion","State", "Age", "Otherreligion", "Urban_Rural","Language", "Survey_Date", "N_accept", "N_reject", "N_reluctantlyaccept")))%>%
  na.omit()%>%
  prcomp(center = TRUE, scale. = TRUE)

summary(pcaresults)
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     4.0781 2.58722 2.39651 1.78412 1.58443 1.47616 1.42466
## Proportion of Variance 0.1848 0.07437 0.06381 0.03537 0.02789 0.02421 0.02255
## Cumulative Proportion  0.1848 0.25916 0.32298 0.35834 0.38624 0.41045 0.43300
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     1.38650 1.36336 1.30830 1.24685 1.21705 1.18305 1.13655
## Proportion of Variance 0.02136 0.02065 0.01902 0.01727 0.01646 0.01555 0.01435
## Cumulative Proportion  0.45436 0.47501 0.49403 0.51131 0.52776 0.54331 0.55767
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     1.12046 1.10196 1.08903 1.07521 1.07036 1.04993 1.02694
## Proportion of Variance 0.01395 0.01349 0.01318 0.01285 0.01273 0.01225 0.01172
## Cumulative Proportion  0.57162 0.58511 0.59829 0.61113 0.62386 0.63611 0.64783
##                          PC22    PC23    PC24   PC25    PC26    PC27    PC28
## Standard deviation     1.0084 0.98229 0.98054 0.9674 0.95674 0.93786 0.93124
## Proportion of Variance 0.0113 0.01072 0.01068 0.0104 0.01017 0.00977 0.00964
## Cumulative Proportion  0.6591 0.66985 0.68053 0.6909 0.70110 0.71087 0.72051
##                           PC29    PC30    PC31    PC32    PC33    PC34    PC35
## Standard deviation     0.91945 0.91874 0.90892 0.88642 0.87280 0.85548 0.85175
## Proportion of Variance 0.00939 0.00938 0.00918 0.00873 0.00846 0.00813 0.00806
## Cumulative Proportion  0.72990 0.73928 0.74846 0.75719 0.76565 0.77379 0.78185
##                           PC36    PC37    PC38    PC39    PC40    PC41    PC42
## Standard deviation     0.83638 0.82895 0.81339 0.80049 0.79529 0.78025 0.76635
## Proportion of Variance 0.00777 0.00764 0.00735 0.00712 0.00703 0.00676 0.00653
## Cumulative Proportion  0.78962 0.79725 0.80461 0.81173 0.81875 0.82552 0.83204
##                           PC43    PC44    PC45    PC46    PC47    PC48    PC49
## Standard deviation     0.75496 0.74127 0.73353 0.71927 0.70928 0.70185 0.69952
## Proportion of Variance 0.00633 0.00611 0.00598 0.00575 0.00559 0.00547 0.00544
## Cumulative Proportion  0.83838 0.84448 0.85046 0.85621 0.86180 0.86727 0.87271
##                           PC50    PC51    PC52    PC53    PC54    PC55    PC56
## Standard deviation     0.69173 0.68714 0.66536 0.65994 0.65445 0.64642 0.63999
## Proportion of Variance 0.00532 0.00525 0.00492 0.00484 0.00476 0.00464 0.00455
## Cumulative Proportion  0.87802 0.88327 0.88819 0.89303 0.89779 0.90243 0.90698
##                           PC57    PC58    PC59   PC60    PC61   PC62    PC63
## Standard deviation     0.63124 0.61434 0.60588 0.5997 0.58925 0.5847 0.57783
## Proportion of Variance 0.00443 0.00419 0.00408 0.0040 0.00386 0.0038 0.00371
## Cumulative Proportion  0.91141 0.91560 0.91968 0.9237 0.92753 0.9313 0.93504
##                           PC64    PC65    PC66    PC67    PC68    PC69    PC70
## Standard deviation     0.56166 0.56020 0.54329 0.53562 0.53535 0.53240 0.52502
## Proportion of Variance 0.00351 0.00349 0.00328 0.00319 0.00318 0.00315 0.00306
## Cumulative Proportion  0.93855 0.94204 0.94532 0.94850 0.95169 0.95484 0.95790
##                           PC71    PC72    PC73   PC74    PC75    PC76    PC77
## Standard deviation     0.51368 0.50376 0.49616 0.4927 0.47740 0.47053 0.46332
## Proportion of Variance 0.00293 0.00282 0.00274 0.0027 0.00253 0.00246 0.00239
## Cumulative Proportion  0.96083 0.96365 0.96639 0.9691 0.97162 0.97408 0.97646
##                           PC78    PC79    PC80    PC81    PC82    PC83    PC84
## Standard deviation     0.45840 0.45376 0.44332 0.42916 0.41769 0.41000 0.40037
## Proportion of Variance 0.00233 0.00229 0.00218 0.00205 0.00194 0.00187 0.00178
## Cumulative Proportion  0.97880 0.98108 0.98327 0.98531 0.98725 0.98912 0.99090
##                           PC85    PC86    PC87    PC88    PC89    PC90
## Standard deviation     0.39496 0.39030 0.36983 0.36479 0.35288 0.34095
## Proportion of Variance 0.00173 0.00169 0.00152 0.00148 0.00138 0.00129
## Cumulative Proportion  0.99263 0.99433 0.99585 0.99732 0.99871 1.00000
var_explained = pcaresults$sdev^2 / sum(pcaresults$sdev^2)

Scree plot of results from PCA

qplot(c(1:90), var_explained) + 
  geom_line() + 
  xlab("Principal Component") + 
  ylab("Variance Explained") +
  ggtitle("Scree Plot") +
  ylim(0, 0.20)+
  xlim(1,7)
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 row(s) containing missing values (geom_path).

Coding and Combining items on Kahan scale

 #median score for Kahan invidualism scale 

codedmodule2 %>%
  select((starts_with(c("K_I", "K_S"))))%>%
  gather(key = "K_I", value = "score")%>%
  na.omit()%>%
  summarise(median = median(c(score)))
# Kahan scale median score for Hierarchy scale
codedmodule2 %>%
  select((starts_with(c("K_H", "K_E"))))%>%
  gather(key = "K_H", value = "score")%>%
  na.omit()%>%
  summarise(median = median(c(score)))
#calculating individualism score and hierarchy score for each respondent

Kscalescores <- codedmodule2 %>%
  select(starts_with(c("Risky","K_I","K_S", "K_H", "K_E")))%>%
  rowwise()%>%
  na.omit()%>%
  dplyr::mutate(Individualism_score = mean(c_across(K_IINTRFER:K_SPROTECT)))%>%
  dplyr::mutate(Hierarchy_score = mean(c_across(K_HEQUAL:K_ERADEQ2)))%>%
  dplyr::select(!starts_with(c("K_I","K_S", "K_H", "K_E")))

Kscalescores
#scatter plot of Kahan scale scores around the median scores on Individualism and Hierarchy scales. 

Kscalescores %>%
  ggplot(aes(Individualism_score, Hierarchy_score))+
  geom_point()+
  geom_hline(yintercept=2, colour="black", lwd=1)+
  geom_vline(xintercept=3, colour="black", lwd=1)

# checking distribution of scores on Individualism scale 

Kscalescores %>%
  ggplot(aes(x=Individualism_score, y = ..count..), fill = "lightgray")+
  geom_density()

# checking distribution of scores on Hierarchy scale 
Kscalescores %>%
  ggplot(aes(x=Hierarchy_score, y = ..count..), fill = "lightgray")+
  geom_density()

Coding and combining items on the eco-political scale - characteristics of the perceiver

ecopolscalescores <- codedmodule2 %>%
  select(starts_with(c("Risky","SYSTEMDEMO","SYSTEMRELIGION","SYSTEMTECHNO","SYSTEMTOTAL","WEALTHLIM","MECHANISATION","DECISIONDECEN","DECISIONCEN","INDUSTRYLARGE","INDUSTRYSMALL","ECONOMYGLOBAL","ECONOMYLOCAL", "ENVOVERDEV","DEVOVERENV","OWNERPVT","OWNERNOREG","OWNERPUB", "OWNERREG", "Gender","Caste","Religion", "State")))%>%
  rowwise()%>%
  na.omit()%>%
  dplyr::mutate(Pro_decentralisation = mean(c_across("DECISIONDECEN":"DECISIONCEN")))%>%
   dplyr::mutate(Pro_largescale = mean(c_across("INDUSTRYLARGE":"INDUSTRYSMALL")))%>%
   dplyr::mutate(Pro_global = mean(c_across("ECONOMYGLOBAL":"ECONOMYLOCAL")))%>%
   dplyr::mutate(Pro_ecogrowth = mean(c_across("ENVOVERDEV":"DEVOVERENV")))%>%
   dplyr::mutate(Pro_private = mean(c_across("OWNERPVT":"OWNERREG")))%>%
  dplyr::select(!c("DECISIONDECEN","DECISIONCEN", "INDUSTRYLARGE","INDUSTRYSMALL","ECONOMYGLOBAL", "ECONOMYLOCAL","ENVOVERDEV",     "DEVOVERENV","OWNERPVT","OWNERNOREG","OWNERPUB", "OWNERREG"))

ecopolscalescores

PCA test on the new eco-pol scale - characteristics of the perceiver

# PCA test on qualities of the perceiver

ecopolscalepca <- ecopolscalescores %>%
  select(!starts_with(c("Risky","Gender","Caste","Religion","State")))%>%
  prcomp(center = TRUE,scale. = TRUE)


var_explained_ecopol = ecopolscalepca$sdev^2 / sum(ecopolscalepca$sdev^2)

summary(ecopolscalepca)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.4705 1.2373 1.1700 1.0027 0.96948 0.90729 0.86862
## Proportion of Variance 0.1966 0.1392 0.1245 0.0914 0.08545 0.07483 0.06859
## Cumulative Proportion  0.1966 0.3358 0.4602 0.5516 0.63706 0.71189 0.78048
##                            PC8     PC9    PC10    PC11
## Standard deviation     0.83091 0.80331 0.78471 0.68059
## Proportion of Variance 0.06276 0.05866 0.05598 0.04211
## Cumulative Proportion  0.84325 0.90191 0.95789 1.00000
qplot(c(1:11), var_explained_ecopol) + 
  geom_line() + 
  xlab("Principal Component") + 
  ylab("Variance Explained") +
  ggtitle("Scree Plot") +
  ylim(0.05, 0.20)+
  xlim(1,7)
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 row(s) containing missing values (geom_path).